This is the second script in the processing pipeline for IMC data.
The goal of this script is to identify and label islets. This is done using a UNET-based deep learning segmentation model (see the islet_segmentation folder in this repository for details and model training).
Main steps:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
import sys
import torch
/opt/conda/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm
from collections import defaultdict
from cv2 import legacy
from itertools import chain
from pathlib import Path
from random import sample
from scipy import ndimage as ndi
from scipy.sparse.csgraph import minimum_spanning_tree
from skimage import measure, color, exposure
from skimage.registration import optical_flow_tvl1
from skimage.segmentation import expand_labels, mark_boundaries
from skimage.transform import warp
from tqdm import tqdm
from math import sqrt
from steinbock import io
from steinbock.segmentation import deepcell
sys.path.append("../islet_segmentation")
import helpers
print(sys.path)
print(sys.executable)
['/home/ubuntu/bbvolume/Git/T1D_preprocessing/processing', '/opt/conda/lib/python39.zip', '/opt/conda/lib/python3.9', '/opt/conda/lib/python3.9/lib-dynload', '', '/opt/conda/lib/python3.9/site-packages', '../islet_segmentation'] /opt/conda/bin/python
Paths to input and output folders as well as antibody panels were exported by the first script (01_Preprocessing.ipynb). Here they are imported again.
with open("./variables/folders.txt", "rb") as handle:
folders = pickle.loads(handle.read())
folders
{'raw': PosixPath('/home/ubuntu/Data3/processing/raw'),
'img': PosixPath('/home/ubuntu/Data3/processing/img'),
'seg_cells': PosixPath('/home/ubuntu/Data3/processing/seg_cells'),
'seg_islets': PosixPath('/home/ubuntu/Data3/processing/seg_islets'),
'masks_cells': PosixPath('/home/ubuntu/Data3/processing/masks_cells'),
'masks_islets': PosixPath('/home/ubuntu/Data3/processing/masks_islets'),
'data_cells': PosixPath('/home/ubuntu/Data3/processing/data_cells'),
'data_islets': PosixPath('/home/ubuntu/Data3/processing/data_islets'),
'data': PosixPath('/home/ubuntu/Data3/processing'),
'git': PosixPath('/home/ubuntu/bbvolume/Git/T1D_preprocessing/processing')}
with open("./variables/panels.txt", "rb") as handle:
panels = pickle.loads(handle.read())
for panel_name, panel in panels.items():
print(panel_name, "\n", panel.head())
panel_names = list(panels.keys())
Islet
channel metal name antibody_clone keep isletseg deepcell \
0 0 Y89 Ghrelin 883622 1 NaN NaN
1 1 In113 Histone H3 D1H2 1 NaN 1.0
2 2 In115 Biotin 1D4-C5 1 NaN NaN
6 6 La139 Somatostatin ICDCLS 1 NaN NaN
7 7 Ce140 Insulin 19H4L12 1 NaN NaN
dimred clustering short_name
0 1.0 1.0 GHRL
1 0.0 0.0 H3
2 1.0 0.0 HBP
6 1.0 1.0 SST
7 1.0 1.0 INS
Immune
channel metal name antibody_clone keep isletseg deepcell \
0 0 Y89 MPO pAb 1 NaN NaN
1 1 In113 Histone H3 D1H2 1 NaN 1.0
2 2 In115 SMA 1A4 1 NaN NaN
6 6 La139 Somatostatin ICDCLS 1 NaN NaN
7 7 Ce140 Insulin 19H4L12 1 NaN NaN
dimred clustering short_name
0 1 1 MPO
1 0 0 H3
2 1 1 SMA
6 1 1 SST
7 1 1 INS
Segmentation stacks are generated by aggregating the channels selected in the panel.csv files (value = 1 in the column isletseg).
Different functions can be used to aggregate channels. Default: np.mean, for other options, see https://numpy.org/doc/stable/reference/routines.statistics.html#averages-and-variances.
# Define image preprocessing options
channelwise_zscore = False
channelwise_minmax = True
aggr_func = np.mean
# Initialize image data frames to store image metadata
image_dfs = dict.fromkeys(panel_names)
for pan_idx, panel_name in enumerate(panels):
image_dfs[panel_name] = pd.DataFrame(columns=["image_name", "width", "height"])
for panel_name, panel in panels.items():
print("Processing", panel_name, "panel")
# Define channels to use for segmentation
islet_channels = panel["isletseg"].values
islet_channels = np.where(islet_channels == 0, np.nan, islet_channels)
# Define the input folder
img_subdir = folders["img"] / panel_name
seg_subdir = folders["seg_islets"] / panel_name
seg_subdir.mkdir(exist_ok=True)
# Create and save segmentation images
for img_path in tqdm(io.list_image_files(img_subdir)):
islet_img = deepcell.create_segmentation_stack(
img = io.read_image(img_path),
channelwise_minmax = channelwise_minmax,
channelwise_zscore = channelwise_zscore,
channel_groups = islet_channels,
aggr_func = aggr_func
)
segstack_file = seg_subdir / (str(img_path.name))
io.write_image(islet_img, segstack_file)
# Add image metadata to the data frame
image_dfs[panel_name] = pd.concat(
[image_dfs[panel_name], pd.DataFrame({
"image_name": img_path.stem,
"width": islet_img.shape[2],
"height": islet_img.shape[1]},
index = [len(image_dfs[panel_name].index)])])
Processing Islet panel
100%|███████████████████████████████████████████████████████████████████████| 7752/7752 [35:52<00:00, 3.60it/s]
Processing Immune panel
100%|███████████████████████████████████████████████████████████████████████| 7752/7752 [56:24<00:00, 2.29it/s]
Display a few randomly-selected islet segmentation images.
# Settings
## number of images per panel to show
nb_images_to_show = 5
## adjust image max intensity if needed (lower value = higher intensity)
max_intensity = 0.25
# Randomly select images
seg_subdir0 = folders["seg_islets"] / panel_names[0]
segstacks = sorted(seg_subdir0.glob("*.tiff"))
rng = np.random.default_rng()
indexes = rng.choice(len(segstacks), nb_images_to_show, replace=False)
# Plot
fig, axs = plt.subplots(nb_images_to_show, 2,
figsize=(12, 4 * nb_images_to_show))
for i,idx in enumerate(indexes):
for j, (panel_name, panel) in enumerate(panels.items()):
##load images
seg_subdir = folders["seg_islets"] / panel_name
img_name = segstacks[idx].name.replace(panel_names[0], panel_name)
img = io.read_image(seg_subdir / img_name)
## plot images
axs[i,j].imshow(np.squeeze(img), vmin=0, vmax=max_intensity)
axs[i,j].set_title(img_name)
axs[i,j].axis('off')
Here we define a function that filters islets by size and shape.
Filtering settings
min_object_size = 50 ## minimum number of pixels per islet
max_eccentricity = 0.95 ## max islet eccentricity
Filtering function
The filter_islets function takes the following steps:
min_object_size variable below).max_eccentricity variable below).def filter_islets(mask, min_obj_size, max_eccent):
# Identify objects and measure their properties
ret, mask = cv2.threshold(mask, 254, 255, cv2.THRESH_BINARY)
labels = measure.label(mask, background=0)
region_props = measure.regionprops(labels)
# Filter the objects (based on min_object_size and max_eccentricity variables)
labels_to_remove = []
for prop in region_props:
if (prop.area < min_object_size or (
prop.eccentricity > max_eccentricity and prop.area < 5*min_object_size)):
labels_to_remove.append(prop.label)
labels = np.where(np.isin(labels, labels_to_remove), 0, labels)
# Fill holes
ret, bin_mask = cv2.threshold(labels.astype("uint8"), 0, 255, cv2.THRESH_BINARY)
bin_mask = ndi.binary_fill_holes(bin_mask).astype(bool)
return bin_mask
Complete the image metadata data frame
This data frame contains all image names, as well as images width and height.
for pan_idx, panel_name in enumerate(panels):
# List input islet segmentation images
seg_subdir = folders["seg_islets"] / panel_name
input_images = sorted([x.name for x in Path.iterdir(seg_subdir) if x.name.endswith(".tiff")])
print("Number of images in", panel_name, "panel:", len(input_images))
# Add Case IDs and Panels
image_dfs[panel_name]["images"] = image_dfs[panel_name]["image_name"] + ".tiff"
df_meta = image_dfs[panel_name]["image_name"].str.split("_", n = 0, expand = True)
image_dfs[panel_name]["case_id"] = df_meta[0]
image_dfs[panel_name]["panel"] = df_meta[1]
image_dfs[panel_name]["acq_id"] = df_meta[3]
# Add paths to images
image_dfs[panel_name]["dir_images"] = folders["seg_islets"] / panel_name
# Merge the data frames
all_image_dfs = pd.concat([df for i,df in image_dfs.items()], ignore_index=True)
all_image_dfs
Number of images in Islet panel: 7752 Number of images in Immune panel: 7752
| image_name | width | height | images | case_id | panel | acq_id | dir_images | |
|---|---|---|---|---|---|---|---|---|
| 0 | 6034_Islet_ROI_001 | 243 | 309 | 6034_Islet_ROI_001.tiff | 6034 | Islet | 001 | /home/ubuntu/Data3/processing/seg_islets/Islet |
| 1 | 6034_Islet_ROI_002 | 232 | 262 | 6034_Islet_ROI_002.tiff | 6034 | Islet | 002 | /home/ubuntu/Data3/processing/seg_islets/Islet |
| 2 | 6034_Islet_ROI_003 | 295 | 304 | 6034_Islet_ROI_003.tiff | 6034 | Islet | 003 | /home/ubuntu/Data3/processing/seg_islets/Islet |
| 3 | 6034_Islet_ROI_004 | 279 | 309 | 6034_Islet_ROI_004.tiff | 6034 | Islet | 004 | /home/ubuntu/Data3/processing/seg_islets/Islet |
| 4 | 6034_Islet_ROI_005 | 274 | 247 | 6034_Islet_ROI_005.tiff | 6034 | Islet | 005 | /home/ubuntu/Data3/processing/seg_islets/Islet |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 15499 | 8011_Immune_ROI_074 | 395 | 408 | 8011_Immune_ROI_074.tiff | 8011 | Immune | 074 | /home/ubuntu/Data3/processing/seg_islets/Immune |
| 15500 | 8011_Immune_ROI_075 | 287 | 296 | 8011_Immune_ROI_075.tiff | 8011 | Immune | 075 | /home/ubuntu/Data3/processing/seg_islets/Immune |
| 15501 | 8011_Immune_ROI_076 | 342 | 334 | 8011_Immune_ROI_076.tiff | 8011 | Immune | 076 | /home/ubuntu/Data3/processing/seg_islets/Immune |
| 15502 | 8011_Immune_ROI_077 | 368 | 378 | 8011_Immune_ROI_077.tiff | 8011 | Immune | 077 | /home/ubuntu/Data3/processing/seg_islets/Immune |
| 15503 | 8011_Immune_ROI_078 | 355 | 313 | 8011_Immune_ROI_078.tiff | 8011 | Immune | 078 | /home/ubuntu/Data3/processing/seg_islets/Immune |
15504 rows × 8 columns
Create the input set
img_size = 160
image_set = helpers.PredictDataset(all_image_dfs, helpers.get_test_augmentations(img_size))
assert(len(image_set) == len(all_image_dfs.index)), f"Data frame and image set have different sizes"
Generation of the islet segmentation model is documented in the islet_segmentation folder of this repository.
The model can also be directly downloaded from zenodo UPDATE: ADD LINK TO THE MODEL
islet_model = helpers.IsletModel()
islet_model.load_state_dict(torch.load(folders["data"] / "islet_segmentation_model.pt"))
islet_model.eval();
Islet masks are predicted based on the U-NET model, resized to the original image size, and filtered using the filter_islets function defined above.
for idx in tqdm(range(len(image_set))):
# Predict the mask
image = image_set[idx]
logits_mask = islet_model(image.unsqueeze(0))
mask = torch.sigmoid(logits_mask)
mask = torch.detach(mask)
mask = (mask > 0.5) * 255.0
mask = mask.squeeze().numpy()
# Resize the mask to original image size
dim = (all_image_dfs.loc[idx, "width"],
all_image_dfs.loc[idx, "height"])
resized_mask = cv2.resize(mask, dim, cv2.INTER_CUBIC)
# Filter the objects
filtered_mask = filter_islets(resized_mask, min_object_size, max_eccentricity)
# Save the binary mask
mask_file = all_image_dfs.loc[idx, "image_name"] + "_bin.tiff"
mask_path = folders["masks_islets"] / all_image_dfs.loc[idx, "panel"]
mask_path.mkdir(exist_ok=True)
io.write_mask(filtered_mask, mask_path / mask_file)
0%| | 0/15504 [00:00<?, ?it/s][W NNPACK.cpp:53] Could not initialize NNPACK! Reason: Unsupported hardware. 100%|█████████████████████████████████████████████████████████████████████| 15504/15504 [29:11<00:00, 8.85it/s]
Display a few randomly-selected images overlaid with newly-generated islet segmentation masks.
# Number of images per panel to show
nb_images_to_show = 5
# Randomly select images
seg_masks_dir0 = folders["masks_islets"] / panel_names[0]
segmasks = sorted(seg_masks_dir0.glob("*.tiff"))
rng = np.random.default_rng()
indexes = rng.choice(len(segmasks), nb_images_to_show, replace=False)
# Plot
fig, axs = plt.subplots(nb_images_to_show, 2,
figsize=(12, 4 * nb_images_to_show))
for i,idx in enumerate(indexes):
for j, (panel_name, panel) in enumerate(panels.items()):
## load images and masks
seg_subdir = folders["seg_islets"] / panel_name
mask_subdir = folders["masks_islets"] / panel_name
img_name = segmasks[idx].name.replace(panel_names[0], panel_name)
img = io.read_image(seg_subdir / img_name.replace("_bin.tiff", ".tiff"))
mask = io.read_mask(mask_subdir / img_name)
## overlay mask borders on images
cur_img = np.squeeze(color.gray2rgb(img*255).astype('uint8'))
borders = mark_boundaries(cur_img, mask*255, mode='thick').astype('uint8')
overlay = cv2.addWeighted(cur_img, 2, borders*255, 0.5, 0)
## plot images
axs[i,j].imshow(overlay)
axs[i,j].set_title(img_name)
axs[i,j].axis('off')
Here, islets on the same image are merged according to their sizes and distances (see the Islet merging functions below).
Then, islets from consecutive sections that are positioned at the same place on the consecutive images after optical flow registration are assigned the same label (see the Islet relabeling functions below). Islets which don't have a corresponding islet on the consecutive image are assigned unique labels. With this, it is possible to know in the downstream analysis that islets which have the same label on consecutive images are actually the same islet.
This approach works for two panels applied to two consecutive sections.
These functions are used to merge islets based on their distance and on their relative sizes.
Compare to a merge based only on distances, this approach makes a difference between fragmented islets (one large islet with smaller islet cell patches around it) and two islets located next to each other. Here, two large islets will merge less easily than a large islet and a smaller islet cell patch located at the same distance of each other.
Steps
The merge_objects function merges objects in a binary mask based on their distance and sizes:
calculate_distances function.calculate_merging_cost function (see details below).max_cost, the objects are not merged.def merge_objects(binary_mask, weight_dist, weight_area, max_cost):
# Label the masks
labeled_mask = measure.label(binary_mask)
# Calculate distances between objects, object areas, and merging cost
distances = calculate_distances(labeled_mask)
object_areas = measure.regionprops_table(labeled_mask, properties = ['area'])['area']
merging_cost = calculate_merging_cost(labeled_mask, distances, object_areas,
weight_dist, weight_area)
# display(pd.DataFrame(distances))
# display(pd.DataFrame(object_areas))
# Calculate minimum spanning tree and return the edges
merging_cost = np.where(merging_cost > max_cost, 0, merging_cost)
# display(pd.DataFrame(merging_cost))
mst = minimum_spanning_tree(merging_cost)
edges = np.column_stack(mst.nonzero())
edges = np.add(edges, 1)
# Merge objects based on edges
final_mask = merge_edges(labeled_mask, edges)
return final_mask
The calculate_distances function measures and returns the distances between all object pairs in a labeled mask.
# Calculate distances between labeled objects
def calculate_distances(labeled_mask):
labels = np.unique(labeled_mask)
n_labels = len(labels)
distances = np.zeros((n_labels, n_labels))
# Calculate distances between each object pairs
for i in range(n_labels):
object_mask = (labeled_mask == labels[i])
distance_map = ndi.distance_transform_edt(~object_mask)
for j in range(i + 1, n_labels):
other_object_mask = (labeled_mask == labels[j])
distance = np.min(distance_map[other_object_mask])
distances[i,j] = distance
# Make the matrix symmetric
distances = np.maximum(distances, distances.T)
return distances[1:,1:]
The calculate_merging_cost function calculates the merging cost for each object pair of a labeled mask.
The total cost is the sum of the weighted distance cost and the weighted area cost.
The weighting values are user-provided : weight_dist and weight_area, respectively:
merging cost = weight_dist distance_cost + weight_area area costarea_cost = absolute difference between the total area of the two objects and the difference in area between the two objects.distance_cost = distance between the two objects.def calculate_merging_cost(labeled_mask, distances, areas, weight_dist, weight_area):
labels = np.unique(labeled_mask)
n_labels = len(labels[labels > 0])
merging_cost = np.zeros((n_labels, n_labels))
# Calculate merging cost between each object pairs
for i in range(n_labels):
for j in range(n_labels):
if (i != j):
area_diff = abs(sqrt(areas[i]) - sqrt(areas[j]))
area_total = sqrt(areas[i]) + sqrt(areas[j])
area_cost = weight_area * abs(area_total - area_diff)
dist_cost = weight_dist * distances[i,j]
merging_cost[i,j] = dist_cost + area_cost
# print(i, j, dist_cost, area_cost)
# Make the matrix symmetric
merging_cost = np.maximum(merging_cost, merging_cost.T)
return merging_cost
The merge_edges function merges labeled objects if they share an edge in the minimum spanning tree (connected components).
It uses the disjoint-set data structure to keep track of the connected components in the minimum spanning tree. It first initializes the parents and ranks dictionaries for each label. The find function is used to find the representative element of a set and the union function is used to merge the connected components.
def merge_edges(labeled_mask, edges):
labeled_mask = labeled_mask.copy()
labels = np.unique(labeled_mask)
parents = defaultdict(lambda: None)
ranks = defaultdict(lambda: 0)
def find(x):
if parents[x] != x:
parents[x] = find(parents[x])
return parents[x]
def union(x, y):
px, py = find(x), find(y)
if px != py:
if ranks[px] > ranks[py]:
parents[py] = px
else:
parents[px] = py
if ranks[px] == ranks[py]:
ranks[py] += 1
for label in labels:
parents[label] = label
for edge in edges:
union(labels[edge[0]], labels[edge[1]])
relabeled_mask = np.zeros_like(labeled_mask)
for label in labels:
relabeled_mask[labeled_mask == label] = find(label)
return relabeled_mask
These functions are used to measure the overlap between all object pairs in two consecutive images, and to assign the same labels to overlapping objects.
The get_regionprops_overlap function takes two labeled masks from consecutive images as input and returns a mapping table (regprops) containing the overlap between all pair of objects (with one object on the first image and the other object on the consecutive image).
def get_regionprops_overlap(labels_panel1, labels_panel2):
region_props = pd.DataFrame()
for i in range(1, np.max(labels_panel1)+1):
cur_object = np.where(labels_panel1==i, 1, 0)
cur_props = pd.DataFrame(measure.regionprops_table(
labels_panel2, cur_object, properties = ['intensity_mean']))
cur_props.rename(columns = {'intensity_mean': i}, inplace = True)
cur_props.set_axis(np.unique(labels_panel2[labels_panel2>0]), inplace=True)
region_props = pd.concat([region_props, cur_props], axis=1)
return region_props
The relabel_masks function takes a labeled mask and a mapping table (regprops) of overlapping objects.
overlap_thresh in parameters below), then object c is relabeled as n.nonoverlap_lab variable) that is larger than all existing labels in the mapping table. Finally, the function returns the relabeled mask, the mapping between old and new labels (here, {c: n, d: y}), and the incremented nonoverlap_lab value).
def relabel_masks(mask_labeled, regprops, threshold, nonoverlap_lab) :
old_new = dict()
mask_relabeled = mask_labeled.copy()
reg_indexes = regprops.index
for i in reg_indexes:
new_id = np.min(regprops.loc[i][regprops.loc[i] >= threshold].index)
if(not np.isnan(new_id)):
mask_relabeled[mask_labeled==i] = new_id
old_new.update({i: new_id})
else:
mask_relabeled[mask_labeled==i] = nonoverlap_lab
old_new.update({i: nonoverlap_lab})
nonoverlap_lab += 1
return mask_relabeled, old_new, nonoverlap_lab
The pad_images function takes two images as input. It calculates which image has the larger width and which image has the larger height. It then pads the image with the smaller width and the image with the smaller height with a uniform color defined by the pad_color variable. Finally, it returns the two padded images, which have the same size, and the offset coordinates, which can later be used to crop-out the paddings.
def pad_images(image1, image2, pad_color):
# Image shapes
y1, x1 = np.squeeze(image1).shape
y2, x2 = np.squeeze(image2).shape
max_width = np.maximum(x1, x2)
max_height = np.maximum(y1, y2)
# Compute offsets between image centers
offset_x1 = max((x2 - x1) // 2, 0)
offset_y1 = max((y2 - y1) // 2, 0)
offset_x2 = max((x1 - x2) // 2, 0)
offset_y2 = max((y1 - y2) // 2, 0)
# Create empty padded images
image1_padded = np.full((max_height, max_width), pad_color, dtype=np.uint8)
image2_padded = np.full((max_height, max_width), pad_color, dtype=np.uint8)
# Copy original images into the center of the padded images
image1_padded[offset_y1 : offset_y1 + y1,
offset_x1 : offset_x1 + x1] = image1.copy()
image2_padded[offset_y2 : offset_y2 + y2,
offset_x2 : offset_x2 + x2] = image2.copy()
# Make tupple with offset coordinates to return
img1_offset_coord = (offset_y1, offset_y1+y1, offset_x1, offset_x1+x1)
img2_offset_coord = (offset_y2, offset_y2+y2, offset_x2, offset_x2+x2)
return image1_padded, image2_padded, img1_offset_coord, img2_offset_coord
Here, the functions defined above are used to match islets on consecutive sections and relabel them accordingly.
Steps (for details, see descriptions of the functions above)
mask1_init_labels and mask2_init_labels.pad_images function.mask1_corrected).filter_islets function, as already done after islet segmentation.overlap_thresh, the mask1 objects as relabeled as in mask2.mask1_relabeled.mask1_init_labels and mask2_init_labels, according to the relabeled masks mask1_relabeled and mask2_relabeled.User-defined parameters
distance_weight: When calculating the merging cost, multiplier applied to the distance between the objects.area_weight: When calculating the merging cost, multiplier applied to the area cost (see the calculate_merging_cost function above for details).max_cost: Maximal merging cost, above which two objects are not merged.overlap_thresh: Fraction of overlap above which two objects from consecutive sections are considered as overlapping.# Islet merging parameters (on the same image)
distance_weight = 3
area_weight = 1.25
max_cost = 250
# Islet alignment parameters (on consecutive images)
overlap_thresh = 0.1
Processing
# Define input folders
img1_subdir = folders["seg_islets"] / panel_names[0]
img2_subdir = folders["seg_islets"] / panel_names[1]
masks1_subdir = folders["masks_islets"] / panel_names[0]
masks2_subdir = folders["masks_islets"] / panel_names[1]
temp_masks_panel1 = list(m for m in io.list_image_files(masks1_subdir) if "_bin" in str(m))
for file in tqdm(temp_masks_panel1):
file1 = file.name
file2 = file.name.replace(panel_names[0], panel_names[1])
out_mask_name1 = masks1_subdir / file1.replace("_bin", "")
out_mask_name2 = masks2_subdir / file2.replace("_bin", "")
if(not(Path.exists(out_mask_name1) and Path.exists(out_mask_name2))):
# Read masks and assign initial labels
mask1 = cv2.imread(str(masks1_subdir / file1), cv2.IMREAD_UNCHANGED)
mask2 = cv2.imread(str(masks2_subdir / file2), cv2.IMREAD_UNCHANGED)
mask1_init_labels = measure.label(mask1)
mask2_init_labels = measure.label(mask2)
if (np.amax(mask1_init_labels) > 1) or (
np.amax(mask2_init_labels) > 1):
# Read images and convert them to 8-bit
img1 = cv2.imread(str(img1_subdir / file1.replace("_bin", "")), cv2.IMREAD_UNCHANGED)
img2 = cv2.imread(str(img2_subdir / file2.replace("_bin", "")), cv2.IMREAD_UNCHANGED)
img1 = (img1 * 255).astype('uint8')
img2 = (img2 * 255).astype('uint8')
# Pad the smaller image and mask, to make sure both images/masks have the same size
img1_padded, img2_padded, img1_off, img2_off = pad_images(img1, img2, (0))
mask1_padded, mask2_padded, mask1_off, mask2_off = pad_images(
mask1_init_labels, mask2_init_labels, (0)
)
# Learn optical flow registration based on images
v, u = optical_flow_tvl1(img1_padded, img2_padded, attachment=50, num_warp=50)
row_coords, col_coords = np.meshgrid(np.arange(img2_padded.shape[0]),
np.arange(img2_padded.shape[1]),
indexing = 'ij')
# Apply learnt registration to the first panel mask
dest_mat = np.array([row_coords - v, col_coords - u])
mask1_corrected = (warp(mask1_padded, dest_mat, mode='edge') * 255).astype('uint8')
# Filter and merge the corrected masks
ret, mask1_thresh = cv2.threshold(mask1_corrected, 0, 255, cv2.THRESH_BINARY)
ret, mask2_thresh = cv2.threshold(mask2_padded, 0, 255, cv2.THRESH_BINARY)
mask1_filtered = filter_islets(mask1_thresh, min_object_size, max_eccentricity)
mask2_filtered = filter_islets(mask2_thresh, min_object_size, max_eccentricity)
mask1_labels = merge_objects(mask1_filtered, distance_weight, area_weight, max_cost)
mask2_labels = merge_objects(mask2_filtered, distance_weight, area_weight, max_cost)
if (np.amax(mask1_labels) > 0) and (np.amax(mask2_labels) > 0):
# Match islets from consecutive sections (based on overlap) and relabel first panel mask
region_props1 = get_regionprops_overlap(mask2_labels, mask1_labels)
cur_unmatched_label = int(np.max([np.max(region_props1.index),
np.max(region_props1.columns)])) + 1
mask1_relabeled, new_labels1, cur_unmatched_label = relabel_masks(
mask1_labels, region_props1, overlap_thresh, cur_unmatched_label
)
# Match islets from consecutive sections and relabel second panel mask
region_props2 = get_regionprops_overlap(mask1_relabeled, mask2_labels)
mask2_relabeled, new_labels2, cur_unmatched_label = relabel_masks(
mask2_labels, region_props2, overlap_thresh, cur_unmatched_label
)
# Re-label initial mask1 with the new labels that match the second panel labels
region_props_final1 = get_regionprops_overlap(mask1_relabeled, mask1_corrected)
mask1_final, new_labels_final1, cur_unmatched_label = relabel_masks(
mask1_init_labels, region_props_final1, overlap_thresh, cur_unmatched_label
)
# Re-label initial mask2 with the new labels that match the first panel labels
region_props_final2 = get_regionprops_overlap(mask2_relabeled, mask2_padded)
mask2_final, new_labels_final2, cur_unmatched_label = relabel_masks(
mask2_init_labels, region_props_final2, overlap_thresh, cur_unmatched_label
)
else:
mask1_final = mask1_labels.copy()
mask2_final = mask2_labels.copy()
else:
mask1_final = mask1.copy()
mask2_final = mask2.copy()
# Save re-labeled masks and remove temporary masks
io.write_mask(mask1_final, out_mask_name1)
io.write_mask(mask2_final, out_mask_name2)
27%|██████████████████▏ | 2130/7752 [5:36:32<20:30:53, 13.14s/it]
Acquisition and islet masks from consecutive sections (panels) are displayed side-by-side to visually confirm that:
All images in the dataset were checked for quality and problematic images (e.g., bad quality, failed islet segmentation) were manually flagged for deletion.
The following variables can be adjusted:
# Display only images from the following patient (4-digits case id)
# if `None`, images from all patients will be displayed
case_id = None
# Select the number of images to show
# if `None`, all the images will be displayed
nb_img_to_show = 10
# Adjust image max intensity if needed (lower value = higher intensity)
max_intensity = 0.5
# Adjust mask transparency
overlay_alpha = 0.33
# Color palette
cmap = plt.cm.tab20
cmaplist = (np.array([cmap(i) for i in range(cmap.N)]) * 255).astype('uint8')
cmaplist = np.delete(cmaplist, 3, 1)
cmaplist = np.vstack([[0, 0, 0], cmaplist, cmaplist])
Select images
# List all the masks for the first panel
panel1_islet_masks_dir = folders['masks_islets'] / panel_names[0]
img_to_show = [mask for mask in io.list_mask_files(
panel1_islet_masks_dir) if not mask.name.endswith("_bin.tiff")]
# If a specific case is selected, subset the images from this case
if case_id is not None:
img_to_show = [mask for mask in img_to_show if mask.name.startswith(str(case_id))]
# If a number of `nb_img_to_show` is not `None`, randomly subset that number of images
if nb_img_to_show is not None:
rng = np.random.default_rng()
idx = rng.choice(len(img_to_show),
np.minimum(nb_img_to_show, len(img_to_show)), replace = False)
img_to_show = [img_to_show[i] for i in idx]
# Count and display the final number of images to display
nb_img_to_show = len(img_to_show)
print(str(nb_img_to_show) + " images will be displayed")
if (nb_img_to_show > 50):
print("WARNING - More than 50 images are selected for display")
10 images will be displayed
Display
fig, ax = plt.subplots(nb_img_to_show, 2 * len(panel_names),
figsize=(18, 5 * nb_img_to_show))
for i, file in enumerate(sorted(img_to_show)):
for pan_idx, panel_name in enumerate(panel_names):
# Load masks and images
file_name = file.name.replace(panel_names[0], panel_names[pan_idx])
cur_masks_dir = folders['masks_islets'] / panel_name
cur_img_dir = folders['seg_islets'] / panel_name
img = io.read_image(cur_img_dir / file_name)
mask = io.read_mask(cur_masks_dir / file_name)
# Normalize image and convert to grayscale
cur_img = exposure.adjust_sigmoid(img[0,...], 0.1)
cur_img = color.gray2rgb(cur_img*255).astype('uint8')
# Color objects according to the map defined above, identify borders
cur_mask = cmaplist[mask].astype('uint8')
borders = mark_boundaries(cur_img, mask, mode='thick').astype('uint8')
# Create a overlay with images, colored masks and borders
overlay = cv2.addWeighted(cur_img, (1-overlay_alpha),
cur_mask, overlay_alpha, 0)
overlay = cv2.addWeighted(overlay, 1, borders*255, 0.5, 0)
# Display images and overlays
ax[i,pan_idx].imshow(img[0,:,:], vmax = max_intensity)
ax[i,pan_idx].set_title(file_name)
ax[i,pan_idx].axis('off')
ax[i,pan_idx+2].imshow(overlay, vmax = 1)
ax[i,pan_idx+2].axis('off')
All images are visually checked and problematic images are flagged for deletion. The reason for which an image was flagged is indicated (#0), with numbers corresponding to the following code:
Manually flag images
flagged_images = {
"6043": ["033"], #2
"6061": ["016"], #4
"6063": ["041", "056", "069"], #4, #2, #1
"6090": ["051", "061"], #1
"6135": ["063", "075"], #1
"6145": ["035"], #1
"6150": ["006", "046"], #2, #1
"6171": ["031", "066"], #3, #1
"6181": ["009", "011", "013", "014", "017", "019",\
"020", "021", "022", "028", "036", "037",\
"045", "049", "050", "052", "053", "062",\
"065", "066", "067", "074"], #see note above
"6197": ["068"], #1
"6208": ["013"], #1
"6209": ["021"], #1
"6224": ["002"], #1
"6228": ["027","031","035","040","050","055","075"], #1
"6234": ["045", "081"], #3, #2
"6247": ["003", "013", "024", "047", "051", "066"], #2
"6285": ["076"], #2
"6289": ["075"], #2
"6301": ["063", "079"], #1, #2
"6303": ["072"], #2
"6310": ["007", "062"], #2
"6324": ["033", "053"], #1, #2
"6388": ["036", "051", "074"], #2
"6396": ["025"], #3
"6397": ["071"], #1
"6414": ["075"], #2
"6418": ["032"], #1
"6428": ["020"], #3
"6437": ["019"], #2
"6450": ["011", "020", "055"], #1
"6483": ["025"], #2
"6494": ["011", "016", "060"], #2
"6506": ["022", "027", "028", "029"], #1 #2 #4 #4
"6509": ["016"], #4
"6510": ["060", "064"], #1
"6512": ["022", "053"], #2
"6514": ["044"], #2
"6517": ["008", "038", "061", "073"], #1, #1, #1, #4
"6520": ["059"], #4
"6538": ["087"], #4
"6547": ["042"], #2
"6549": ["035"], #4
"6550": ["028"], #4
"6551": ["023"], #4
"6563": ["054"] #4
}
Delete flagged images
delete_flagged_images = True
# Make a list of flagged images
flagged_img_list = list(chain.from_iterable(map(lambda x: [x[0] + "__" + str(i) for i in x[1]], flagged_images.items())))
# Directories from which the images should be deleted
target_directories = [folders["img"], folders["seg_islets"], folders["masks_islets"]]
if flagged_img_list and delete_flagged_images:
for panel_name in panel_names:
for directory in target_directories:
cur_dir = directory / panel_name
images_to_delete = [
cur_dir / (im.replace("__", ("_" + panel_name + "_ROI_")) + ".tiff") \
for im in flagged_img_list]
for image in images_to_delete:
Path.unlink(image, missing_ok=True)
Remove binary masks
These masks are generated during an intermediate step of the islet segmentation process and are not needed anymore.
bin_masks = [mask for mask in io.list_mask_files(folders["masks_islets"]) if mask.name.endswith("_bin.tiff")]
for bin_mask in bin_masks:
Path.unlink(bin_mask, missing_ok=True)
The next step in this pipeline is cell segmentation, which is performed with the 03_CellSegmentation.ipynb notebook.
!conda list
# packages in environment at /opt/conda: # # Name Version Build Channel _libgcc_mutex 0.1 main _openmp_mutex 4.5 1_gnu absl-py 1.3.0 pypi_0 pypi albumentations 1.3.0 pypi_0 pypi anndata 0.8.0 pypi_0 pypi anyio 3.5.0 py39h06a4308_0 argon2-cffi 21.3.0 pyhd3eb1b0_0 argon2-cffi-bindings 21.2.0 py39h7f8727e_0 asttokens 2.0.5 pyhd3eb1b0_0 astunparse 1.6.3 pypi_0 pypi attrs 22.1.0 py39h06a4308_0 babel 2.9.1 pyhd3eb1b0_0 backcall 0.2.0 pyhd3eb1b0_0 beautifulsoup4 4.11.1 py39h06a4308_0 blas 1.0 mkl bleach 4.1.0 pyhd3eb1b0_0 brotlipy 0.7.0 py39h27cfd23_1003 bzip2 1.0.8 h7b6447c_0 ca-certificates 2022.10.11 h06a4308_0 cachetools 5.2.0 pypi_0 pypi certifi 2022.9.24 py39h06a4308_0 cffi 1.15.0 py39hd667e15_1 charset-normalizer 2.0.4 pyhd3eb1b0_0 click 8.1.3 pypi_0 pypi click-log 0.4.0 pypi_0 pypi colorama 0.4.4 pyhd3eb1b0_0 conda 22.11.1 py39h06a4308_4 conda-content-trust 0.1.1 pyhd3eb1b0_0 conda-package-handling 1.8.1 py39h7f8727e_0 contourpy 1.0.6 pypi_0 pypi cpuonly 2.0 0 pytorch cryptography 36.0.0 py39h9ce1e76_0 cycler 0.11.0 pypi_0 pypi cython 0.29.32 pypi_0 pypi debugpy 1.5.1 py39h295c915_0 decorator 5.1.1 pyhd3eb1b0_0 deepcell 0.12.0 pypi_0 pypi deepcell-toolbox 0.11.2 pypi_0 pypi deepcell-tracking 0.6.3 pypi_0 pypi defusedxml 0.7.1 pyhd3eb1b0_0 efficientnet-pytorch 0.7.1 pypi_0 pypi entrypoints 0.4 py39h06a4308_0 executing 0.8.3 pyhd3eb1b0_0 fcswrite 0.6.1 pypi_0 pypi ffmpeg 4.3 hf484d3e_0 pytorch flatbuffers 22.12.6 pypi_0 pypi flit-core 3.6.0 pyhd3eb1b0_0 fonttools 4.38.0 pypi_0 pypi freetype 2.11.0 h70c0345_0 gast 0.5.3 pypi_0 pypi giflib 5.2.1 h7b6447c_0 gmp 6.2.1 h295c915_3 gnutls 3.6.15 he1e5248_0 google-auth 2.15.0 pypi_0 pypi google-auth-oauthlib 0.4.6 pypi_0 pypi google-pasta 0.2.0 pypi_0 pypi grpcio 1.51.1 pypi_0 pypi h5py 3.6.0 pypi_0 pypi icu 58.2 he6710b0_3 idna 3.3 pyhd3eb1b0_0 imageio 2.21.3 pypi_0 pypi importlib-metadata 5.2.0 pypi_0 pypi intel-openmp 2021.4.0 h06a4308_3561 ipykernel 6.15.2 py39h06a4308_0 ipython 8.7.0 py39h06a4308_0 ipython_genutils 0.2.0 pyhd3eb1b0_1 jedi 0.18.1 py39h06a4308_1 jinja2 3.1.2 py39h06a4308_0 joblib 1.2.0 pypi_0 pypi jpeg 9e h7f8727e_0 json5 0.9.6 pyhd3eb1b0_0 jsonschema 4.16.0 py39h06a4308_0 jupyter_client 7.4.7 py39h06a4308_0 jupyter_core 4.11.2 py39h06a4308_0 jupyter_server 1.18.1 py39h06a4308_0 jupyterlab 3.5.0 py39h06a4308_0 jupyterlab_pygments 0.1.2 py_0 jupyterlab_server 2.16.3 py39h06a4308_0 keras 2.8.0 pypi_0 pypi keras-preprocessing 1.1.2 pypi_0 pypi kiwisolver 1.4.4 pypi_0 pypi lame 3.100 h7b6447c_0 lcms2 2.12 h3be6417_0 ld_impl_linux-64 2.35.1 h7274673_9 lerc 3.0 h295c915_0 libclang 14.0.6 pypi_0 pypi libdeflate 1.8 h7f8727e_5 libffi 3.3 he6710b0_2 libgcc-ng 11.2.0 h1234567_1 libgomp 11.2.0 h1234567_1 libiconv 1.16 h7f8727e_2 libidn2 2.3.2 h7f8727e_0 libpng 1.6.37 hbc83047_0 libsodium 1.0.18 h7b6447c_0 libstdcxx-ng 11.2.0 h1234567_1 libtasn1 4.16.0 h27cfd23_0 libtiff 4.4.0 hecacb30_0 libunistring 0.9.10 h27cfd23_0 libwebp 1.2.4 h11a3e52_0 libwebp-base 1.2.4 h5eee18b_0 libxml2 2.9.14 h74e7548_0 libxslt 1.1.35 h4e12654_0 lxml 4.9.2 pypi_0 pypi lz4-c 1.9.4 h6a678d5_0 markdown 3.4.1 pypi_0 pypi markupsafe 2.1.1 py39h7f8727e_0 matplotlib 3.6.2 pypi_0 pypi matplotlib-inline 0.1.6 py39h06a4308_0 mistune 0.8.4 py39h27cfd23_1000 mkl 2021.4.0 h06a4308_640 mkl-service 2.4.0 py39h7f8727e_0 mkl_fft 1.3.1 py39hd3c417c_0 mkl_random 1.2.2 py39h51133e4_0 munch 2.5.0 pypi_0 pypi natsort 8.2.0 pypi_0 pypi nbclassic 0.4.8 py39h06a4308_0 nbclient 0.5.13 py39h06a4308_0 nbconvert 6.5.4 py39h06a4308_0 nbformat 5.7.0 py39h06a4308_0 ncurses 6.3 h7f8727e_2 nest-asyncio 1.5.5 py39h06a4308_0 nettle 3.7.3 hbbd107a_1 networkx 2.8.6 pypi_0 pypi notebook 6.5.2 py39h06a4308_0 notebook-shim 0.2.2 py39h06a4308_0 numpy 1.23.3 pypi_0 pypi numpy-base 1.23.4 py39h31eccc5_0 oauthlib 3.2.2 pypi_0 pypi opencv-contrib-python 4.6.0.66 pypi_0 pypi opencv-python-headless 4.6.0.66 pypi_0 pypi openh264 2.1.1 h4ff587b_0 openssl 1.1.1s h7f8727e_0 opt-einsum 3.3.0 pypi_0 pypi packaging 22.0 pypi_0 pypi pandas 1.4.4 pypi_0 pypi pandocfilters 1.5.0 pyhd3eb1b0_0 parso 0.8.3 pyhd3eb1b0_0 pexpect 4.8.0 pyhd3eb1b0_3 pickleshare 0.7.5 pyhd3eb1b0_1003 pillow 9.3.0 pypi_0 pypi pip 21.2.4 py39h06a4308_0 pluggy 1.0.0 py39h06a4308_1 pretrainedmodels 0.7.4 pypi_0 pypi prometheus_client 0.14.1 py39h06a4308_0 prompt-toolkit 3.0.20 pyhd3eb1b0_0 protobuf 3.20.3 pypi_0 pypi psutil 5.9.0 py39h5eee18b_0 ptyprocess 0.7.0 pyhd3eb1b0_2 pure_eval 0.2.2 pyhd3eb1b0_0 pyasn1 0.4.8 pypi_0 pypi pyasn1-modules 0.2.8 pypi_0 pypi pycosat 0.6.3 py39h27cfd23_0 pycparser 2.21 pyhd3eb1b0_0 pydot 1.4.2 pypi_0 pypi pygments 2.11.2 pyhd3eb1b0_0 pyopenssl 22.0.0 pyhd3eb1b0_0 pyparsing 3.0.9 py39h06a4308_0 pyrsistent 0.18.0 py39heee7806_0 pysocks 1.7.1 py39h06a4308_0 python 3.9.12 h12debd9_0 python-dateutil 2.8.2 pyhd3eb1b0_0 python-fastjsonschema 2.16.2 py39h06a4308_0 pytorch 1.13.1 py3.9_cpu_0 pytorch pytorch-mutex 1.0 cpu pytorch pytz 2022.7 pypi_0 pypi pywavelets 1.4.1 pypi_0 pypi pyyaml 6.0 pypi_0 pypi pyzmq 23.2.0 py39h6a678d5_0 qudida 0.0.4 pypi_0 pypi readimc 0.6.1 pypi_0 pypi readline 8.1.2 h7f8727e_1 requests 2.27.1 pyhd3eb1b0_0 requests-oauthlib 1.3.1 pypi_0 pypi rsa 4.9 pypi_0 pypi ruamel.yaml 0.17.21 py39h5eee18b_0 ruamel.yaml.clib 0.2.6 py39h5eee18b_1 ruamel_yaml 0.15.100 py39h27cfd23_0 scikit-image 0.19.3 pypi_0 pypi scikit-learn 1.2.0 pypi_0 pypi scipy 1.9.1 pypi_0 pypi segmentation-models-pytorch 0.3.1 pypi_0 pypi send2trash 1.8.0 pyhd3eb1b0_1 setuptools 61.2.0 py39h06a4308_0 six 1.16.0 pyhd3eb1b0_1 sniffio 1.2.0 py39h06a4308_1 soupsieve 2.3.2.post1 py39h06a4308_0 spektral 1.0.6 pypi_0 pypi sqlite 3.38.2 hc218d9a_0 stack_data 0.2.0 pyhd3eb1b0_0 steinbock 0.15.0 pypi_0 pypi tensorboard 2.8.0 pypi_0 pypi tensorboard-data-server 0.6.1 pypi_0 pypi tensorboard-plugin-wit 1.8.1 pypi_0 pypi tensorflow 2.8.0 pypi_0 pypi tensorflow-addons 0.16.1 pypi_0 pypi tensorflow-io-gcs-filesystem 0.29.0 pypi_0 pypi termcolor 2.1.1 pypi_0 pypi terminado 0.13.1 py39h06a4308_0 tf-estimator-nightly 2.8.0.dev2021122109 pypi_0 pypi threadpoolctl 3.1.0 pypi_0 pypi tifffile 2022.8.12 pypi_0 pypi timm 0.4.12 pypi_0 pypi tinycss2 1.2.1 py39h06a4308_0 tk 8.6.11 h1ccaba5_0 tomli 2.0.1 py39h06a4308_0 toolz 0.12.0 py39h06a4308_0 torchaudio 0.13.1 py39_cpu pytorch torchvision 0.14.1 py39_cpu pytorch tornado 6.2 py39h5eee18b_0 tqdm 4.63.0 pyhd3eb1b0_0 traitlets 5.7.1 py39h06a4308_0 typeguard 2.13.3 pypi_0 pypi typing-extensions 4.4.0 py39h06a4308_0 typing_extensions 4.4.0 py39h06a4308_0 tzdata 2022a hda174b7_0 urllib3 1.26.8 pyhd3eb1b0_0 wcwidth 0.2.5 pyhd3eb1b0_0 webencodings 0.5.1 py39h06a4308_1 websocket-client 0.58.0 py39h06a4308_4 werkzeug 2.2.2 pypi_0 pypi wheel 0.37.1 pyhd3eb1b0_0 wrapt 1.14.1 pypi_0 pypi xtiff 0.7.8 pypi_0 pypi xz 5.2.5 h7b6447c_0 yaml 0.2.5 h7b6447c_0 zeromq 4.3.4 h2531618_0 zipp 3.11.0 pypi_0 pypi zlib 1.2.12 h7f8727e_1 zstd 1.5.2 ha4553b6_0